

*****************
*This table makes table 1
*****************


use "stata data/main regs dataset", clear
keep sa
gduplicates drop
save "stata data/new sas used", replace

use "stata data/A1 data -8 to 8 HUGE", clear //know just want the 38/42 part

gen dow = dow(date)
*keep if inrange(dow,1,5)
*drop if date == mdy(7,4,2014) //july 4th
*drop if date == mdy(9,1,2014) //Labour day

drop if inrange(hour,14,17) & event_day == 1 //this is event hours on event days


*keep if event_day == 1

fcollapse (sum) kwh, by(sa year)
rename kwh kwh_non_event
save "stata data/non_event_hour_consumption", replace




use "stata data/main regs dataset", clear

egen max_eligible = max(eligible), by(sa)
fcollapse (mean) kwh_avg = kwh (max) kwh_max=kwh (sum) kwh_event = kwh, by(sa max_eligible year)

save "stata data/event_hour_consumption", replace


//create some consumption data 

use "stata data/A1 Standard regression dataset 1-22-2016", clear
keep if inrange(bin,-8,8)
capture drop _merge
merge m:1 sa using "stata data/saids used in main analysis"
keep if _merge == 3
drop _merge




preserve
fcollapse (mean) temp_f , by(sa climate time )
save "stata data/A1 pdp average 2014 and 2015 summer temp by sa and climate", replace
restore 

keep if year == 2014		
 
fcollapse (mean) kwh_avg = kwh (max) kwh_max=kwh , by(sa bin)
save "stata data/A1 pdp average 2014 kwhs", replace
		

		
///This is how I get billing data into the table. I was sent this by PG&E and I really only use it here since it doesn't quite match up with what I want for other things	
fdause  "raw data/DR150504.TOUSMED.EBIL14.xpt", clear

rename sa_id sa

drop dmdof* dmdon* kwhof* kwhon* kwhpt* dmdpt*

drop prem 
reshape long cdt__ kwh__ pdt__ rsch__ rev__ dmd__ , i(sa) j(month)

gen bill_days = cdt - pdt  //don't need extra day in there. Just works out that way when expanding. Example testing shows this. 

expand bill_days
gen rev_day = rev / bill_days

sort month

bysort sa month: gen day_counter = _n -1

gen date = day_counter + pdt
format date %td

gen date_ym = mofd(date)
format date_ym %tm

collapse (sum) rev_day, by(sa date_ym month)

rename rev_day bill_amount

keep if inrange(month,6,10)

collapse (sum) bill_amount, by(sa)


save "stata data/2014 billing data from regressions", replace

		
		
		

///start building the table 
use "stata data/A1 smart meter install date with interesting info", clear


//naics classifications used in regressions

gen customer_facing = 0
replace customer_facing = 1 if inlist(naics_most_2,44,45,71,72, 62)
replace customer_facing = . if naics_most_2 == .

gen no_customer_facing = 0
replace no_customer_facing = 1 if customer_facing == 0
replace no_customer_facing = . if naics_most_2 == .

//merge in Structurial winner stuff
*merge 1:1 sa using "stata data/struct winner by sa"
merge 1:1 sa using "stata data/structural winner 2014 raw calcultations"
drop if _merge == 2 //large portion dropped
drop _merge

drop kwh_non_event kwh_event
drop total_kwh

merge 1:1 sa using "stata data/new sas used"
keep if _merge == 3
drop _merge

capture gen year = 2014

//pull in event statistics
merge 1:1 sa year using "stata data/event_hour_consumption"
keep if _merge == 3
drop _merge

merge 1:1 sa year using "stata data/non_event_hour_consumption"
keep if _merge == 3
drop _merge

//merge in tempreature data
merge 1:1 sa using "stata data/A1 pdp average 2014 and 2015 summer temp by sa and climate"
drop if _merge == 2
drop _merge 


merge 1:1 sa using "stata data/2014 billing data from regressions"
drop if _merge == 2 //missing billing data for around 6,000 customers, all from the new data drop
drop _merge


replace bill_amount = bill_amount / 5 //this is 5 months of bills

*label variable goods "Percent goods producing industries"		
*label variable service "Percent service providing industries"		
label variable temp_f "Average summer peak hours temperature (F)"
label variable kwh_avg "Average peak hours kWh consumption"
label variable kwh_max "Max peak hours kWh consumption"
label variable struct_winner "2014 structural winners"
label variable money_saved "Money saved if program run on 2014 usage"
label variable kwh_non_event "2014 non-event summer consumption (kWh)"
label variable kwh_event "2014 event summer consumption (kWh)"
*label variable has_ee "Energy efficiency installed in pre-period"
label variable bill "Total 2014 summer electricity bill"		



save "stata data/pre table stuff_2014", replace




*******************
**start building table
*******************



global filename jmp_results_ttable_sd_2014

use "stata data/pre table stuff_2014", clear

drop if optional == 1 
gen coastal = 0
replace coastal = 1 if inlist(climate,5,6,2)



save "stata data/run ttest on sample_2014", replace

use "stata data/run ttest on sample_2014", clear

estpost  ttest temp_f bill kwh_avg kwh_max struct_winner money_saved kwh_non_event kwh_event customer_facing	no_customer_facing  coastal if inrange(bin,-8,8) , by(eligible) esample

matrix b = e(mu_1) \ e(mu_2) \ e(p) \ e(count) \ e(N_1) \ e(N_2)

drop *


svmat b, names(col)

xpose, clear varname
rename (v1 v2 v3 v4 v5 v6) (mu_1 mu_2 p count N_1 N_2)
rename _varname variable
order var

save "stata data/ttable summary stats_2014", replace


use "stata data/run ttest on sample_2014", clear

estpost  summarize temp_f bill kwh_avg kwh_max struct_winner money_saved kwh_non_event kwh_event customer_facing ///
no_customer_facing  coastal if inrange(bin,-8,8) & eligible == 0

matrix b = e(sd)
drop *
svmat b, names(col)
xpose, clear varname
rename v1 sd_ineligible
rename _varname variable
order var
save "stata data/ttable summary stats - SD ineligible_2014", replace


use "stata data/run ttest on sample_2014", clear

estpost  summarize temp_f bill kwh_avg kwh_max struct_winner money_saved kwh_non_event kwh_event customer_facing ///
no_customer_facing  coastal if inrange(bin,-8,8) & eligible == 1

matrix b = e(sd)
drop *
svmat b, names(col)
xpose, clear varname
rename v1 sd_eligible
rename _varname variable
order var
save "stata data/ttable summary stats - SD eligible_2014", replace


use "stata data/ttable summary stats_2014", clear
merge 1:1 variable using "stata data/ttable summary stats - SD ineligible_2014"
drop _merge
merge 1:1 variable using "stata data/ttable summary stats - SD eligible_2014"

gen stars = ""
replace stars = "*" if p<.1
replace stars = "**" if p<.05
replace stars = "***" if p<.01



replace sd_ineligible = round(sd_ineligible) if inlist(variable,"kwh_non_event","kwh_event","money_saved","bill_amount")
replace sd_eligible = round(sd_eligible) if inlist(variable,"kwh_non_event","kwh_event","money_saved","bill_amount")


capture file close table1		
	
file open table1 using "latex/$filename.tex", write text replace


file write table1 "\label{$filename}" _n
file write table1 "\label{$filename}" _n
file write table1 "\vspace{-2mm}" _n
*file write table1 "\footnotesize" _n
file write table1 "\begin{tabular}{@{\extracolsep{4pt}}lccc@{}ccc}" _n //this is where you change justification. Options are l c r
*extracolsep from http://tex.stackexchange.com/questions/101000/how-to-shorten-cline-so-that-multiple-groups-in-a-table-become-visible

file write table1 "\hline\hline \\ [-.9em]" _n 

*file write table1 "\multicolumn{1}{c}{$\begin{matrix}\text{Baseline}\\ \text{Territory} \end{matrix}$}& 	\multicolumn{1}{c}{$\begin{matrix}\text{Average}\\ \text{Temperature} \end{matrix}$} &	\multicolumn{1}{c}{$\begin{matrix}\text{Firm}\\ \text{Count} \end{matrix}$}	&	\\" _n
*file write table1 " Energy use classification && Median & & &Thirds	\\" _n 
*file write table1 "  \cline{2-4} \cline{5-7} \\" _n 
*file write table1 "[0.2em]" _n


local col1_top "Variable"
local col1_bottom 
local col2_top "Ineligible"
local col2_bottom ""
local col3_top "Eligible"
local col3_bottom ""
local col4_top "P value of"
local col4_bottom "difference"



local col1 "\multicolumn{1}{c}{$\begin{matrix}\text{`col1_top'}\\ \text{`col1_bottom'} \end{matrix}$}"
local col2 "\multicolumn{1}{c}{$\begin{matrix}\text{`col2_top'}\\ \text{`col2_bottom'} \end{matrix}$}"
local col3 "\multicolumn{1}{c}{$\begin{matrix}\text{`col3_top'}\\ \text{`col3_bottom'} \end{matrix}$}"
local col4 "\multicolumn{1}{c}{$\begin{matrix}\text{`col4_top'}\\ \text{`col4_bottom'} \end{matrix}$}"

file write table1 " `col1_top' & `col2_top' & `col3_top' & `col4'	\\" _n


*file write table1 "  Variable & Ineligible & Eligible & P value of difference\\" _n


file write table1 "[0.1em]" _n
file write table1 "\hline \\" _n
file write table1 "[-0.7em]" _n



capture gen row = _n



gen label = ""

replace label = "Percent of establishments customer facing" if variable == "customer_facing"
replace label = "Percent of establishments not customer facing" if variable == "no_customer_facing"
replace label = "Average peak hour temperature (F)" if variable == "temp_f_mean"
replace label = "Summer 2014 electricity expenditure" if variable == "bill_amount"
replace label = "Summer 2014 avg peak hourly consumption (kWh)" if variable == "kwh_avg"
replace label = "Summer 2014 max peak hourly consumption (kWh)" if variable == "kwh_max"
replace label = "Structural winners" if variable == "struct_winner"
replace label = "Money saved if program run on 2014 usage" if variable == "money_saved"
replace label = "Summer 2014 average peak hourly consumption (kWh)" if variable == "kwh_non_event"
replace label = "Summer 2014 total non-event hours consumption (kWh)" if variable == "kwh_non_event"
replace label = "Summer 2014 total event hours consumption (kWh)" if variable == "kwh_event"

recast double mu_1 mu_2 p
replace mu_1 = round(mu_1,.01)
replace mu_2 = round(mu_2,.01)


tostring mu_1 mu_2 count p, replace force
replace p = substr(p,1,3)

gen table_mu_1 = mu_1
gen table_mu_2 = mu_2

order variable table*

split mu_1 , p(".")
split mu_2 , p(".")

gen dollar_obs = 0
replace dollar_obs = 1 if inlist(variable,"bill_amount")
gen no_decimal = 0
replace no_decimal = 1 if inlist(variable,"kwh_non_event", "kwh_event", "money_saved", "bill_amount")

replace table_mu_1 = mu_11 if variable == "money_saved"
replace table_mu_2 = mu_21 if variable == "money_saved"

replace table_mu_1 = mu_11 if dollar_obs == 1 | no_decimal == 1
replace table_mu_2 = mu_21 if dollar_obs == 1 | no_decimal == 1

replace table_mu_1 = "\" +"$" + table_mu_1 if dollar_obs == 1
replace table_mu_2 = "\" +"$" + table_mu_2 if dollar_obs == 1


/*
gen pre_comma_1 = substr(table_mu_1,1,2)
gen post_comma_1 = substr(table_mu_1,-3,3)
replace table_mu_1 =  pre_comma_1 + "," + post_comma_1 if variable == "kwh_non_event"

gen pre_comma_2 = substr(table_mu_2,1,2)
gen post_comma_2 = substr(table_mu_2,-3,3)
replace table_mu_2 =  pre_comma_2 + "," + post_comma_2 if variable == "kwh_non_event"
*/

capture drop pre* post*


replace table_mu_1 = regexr(table_mu_1, "-", "-\\\$")
replace table_mu_2 = regexr(table_mu_2, "-", "-\\\$")

//last minute tweak
replace table_mu_2 = "10.00" if table_mu_2 == "10" & variable == "kwh_max" 

///Now do the same string fixing for the standard deviations

recast double sd_ineligible sd_eligible
replace sd_ineligible = round(sd_ineligible,.01)
replace sd_eligible = round(sd_eligible,.01)

tostring sd_eligible sd_ineligible, replace force

gen table_sd_ineligible = sd_ineligible
gen table_sd_eligible = sd_eligible

order variable table_mu_1 table_sd_ineligible table_mu_2 table_sd_eligible

split sd_eligible, p(".")
split sd_ineligible, p(".")


replace table_sd_ineligible = sd_ineligible1 if no_decimal == 1
replace table_sd_eligible = sd_eligible1 if no_decimal == 1


gen pre_comma_1_sd = substr(table_sd_ineligible,1,1)
gen post_comma_1_sd = substr(table_sd_ineligible,-3,3)
replace table_sd_ineligible =  pre_comma_1_sd + "," + post_comma_1_sd if variable == "kwh_non_event"

gen pre_comma_2_sd = substr(table_sd_ineligible,1,1)
gen post_comma_2_sd = substr(table_sd_ineligible,-3,3)
replace table_sd_ineligible =  pre_comma_2_sd + "," + post_comma_2_sd if variable == "kwh_non_event"




//each of the columns for r 



local vars   kwh_avg kwh_max kwh_event kwh_non_event  bill_amount customer_facing money_saved temp_f_mean

*levelsof variable, local(vars)
foreach v of local vars  {
  qui sum row if var=="`v'"
	local r = r(mean)
	*local format = format[`r']
	local t1 = label[`r']
	local t2 = table_mu_1[`r']
	local t2sd = "(" + sd_ineligible[`r']  + ")"
	local t3 = table_mu_2[`r']
	local t3sd = "(" + sd_eligible[`r'] + ")"
	local t4 = p[`r']
	local t5 = stars[`r']
	

	
	file write table1 "`t1' & `t2' &  `t3' &  `t4' `t5'  \\" _n
	file write table1 " &   `t2sd' & `t3sd' &    \\" _n

	file write table1 "[0.3em] " _n
}
*

qui sum N_1
local n1 = string(`r(max)',"%5.0fc")

qui sum N_2
local n2 = string(`r(max)',"%5.0fc")


file write table1 " \\ " _n
file write table1 "Establishment count & `n1' & `n2' \\" _n




file write table1 "\hline" _n
file write table1 "\end{tabular}" _n

file close table1		

local filename "$filename.tex"
	

